import pandas as pd
import argparse


# 读取Excel数据
def read_data(file_path):
    df = pd.read_excel(file_path)
    return df


# 提取 committers 列表并根据仓库路径分类
def extract_committers_by_repo(df):
    # 过滤符合条件的仓路径（包含 'openharmony'、'openharmony-sig'、'openharmony-tpc'）且 "码云仓状态" 为 "开始"、"关闭"、"暂停"
    valid_repos = df[df["码云仓状态"].isin(["开始", "关闭", "暂停"])]

    # 定义要筛选的路径关键词
    repo_keywords = ["openharmony", "openharmony-sig", "openharmony-tpc"]

    # 用于存储每个路径的 committers
    repo_committers = {key: set() for key in repo_keywords}

    def extract_and_clean(committers_column, repo_type):
        """提取 committers 列并处理去重和清理"""
        for committers in committers_column:
            if pd.notna(committers):
                committers_list = [
                    committer.strip() for committer in committers.split(",")
                ]
                repo_committers[repo_type].update(committers_list)

    # 针对不同仓库路径分别筛选 Committers
    for keyword in repo_keywords:
        repos = valid_repos[valid_repos["仓路径"].str.contains(keyword)]
        extract_and_clean(repos["Committers"], keyword)

    # 移除空白 committers
    for key in repo_committers:
        repo_committers[key] = {
            committer for committer in repo_committers[key] if committer.strip()
        }

    return repo_committers


# 同步输出到控制台和文件
def output_to_console_and_file(file, text):
    print(text)  # 输出到控制台
    file.write(text + "\n")  # 输出到文件


# 主函数
def main(file_path, output_file_path):
    # 读取数据
    df = read_data(file_path)

    # 提取各仓库路径维度的 committers
    committers_by_repo = extract_committers_by_repo(df)

    # 打开文件用于写入
    with open(output_file_path, "w", encoding="utf-8") as file:
        # 分别输出每个仓库路径的去重 committers 清单及总数
        for repo, committers in committers_by_repo.items():
            # 对 committer 按照首字母排序
            sorted_committers = sorted(committers)

            output_to_console_and_file(file, f"\n仓库路径: {repo}")
            output_to_console_and_file(
                file, "去重后的 Committers 清单 (按字母顺序排序):"
            )
            for committer in sorted_committers:
                output_to_console_and_file(file, committer)
            output_to_console_and_file(
                file, f"去重后的 Committers 总数: {len(sorted_committers)}"
            )


if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="统计 committers 数据")
    parser.add_argument("file_path", type=str, help="Excel 文件路径")
    parser.add_argument("output_file", type=str, help="输出文件路径")
    args = parser.parse_args()
    main(args.file_path, args.output_file)
